This is my Exam 3 document

Lets load the data and take a look at it.

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gganimate) 
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
data<- read.csv(file = "EXAMS/Exam_3/BioLogData_Exam3.csv", sep = "|")
summary(data)
##        Sample.ID        Rep         Well        Dilution    
##  Clear_Creek:288   Min.   :1   A1     : 36   Min.   :0.001  
##  Soil_1     :288   1st Qu.:1   A2     : 36   1st Qu.:0.001  
##  Soil_2     :288   Median :2   A3     : 36   Median :0.010  
##  Waste_Water:288   Mean   :2   A4     : 36   Mean   :0.037  
##                    3rd Qu.:3   B1     : 36   3rd Qu.:0.100  
##                    Max.   :3   B2     : 36   Max.   :0.100  
##                                (Other):936                  
##                        Substrate       Hr_24            Hr_48       
##  2-Hydroxy Benzoic Acid     : 36   Min.   :0.0000   Min.   :0.0000  
##  4-Hydroxy Benzoic Acid     : 36   1st Qu.:0.0000   1st Qu.:0.0060  
##  D-Cellobiose               : 36   Median :0.0320   Median :0.2595  
##  D-Galactonic Acid γ-Lactone: 36   Mean   :0.1703   Mean   :0.4691  
##  D-Galacturonic Acid        : 36   3rd Qu.:0.1872   3rd Qu.:0.7220  
##  D-Glucosaminic Acid        : 36   Max.   :2.6500   Max.   :2.7850  
##  (Other)                    :936                                    
##      Hr_144       
##  Min.   :0.00000  
##  1st Qu.:0.04175  
##  Median :0.75200  
##  Mean   :0.92497  
##  3rd Qu.:1.67950  
##  Max.   :3.11600  
## 

Lets do some exploratory analysis

pairs(data)

class(data$Sample.ID)
## [1] "factor"
class(data$Rep)
## [1] "integer"
class(data$Well)
## [1] "factor"
class(data$Dilution)
## [1] "numeric"
class(data$Substrate)
## [1] "factor"
class(data$Hr_24)
## [1] "numeric"
class(data$Hr_48)
## [1] "numeric"
class(data$Hr_144)
## [1] "numeric"

Some regressions models and summary stats.

a<- lm(formula = Dilution ~ Hr_24, data = data)
summary(a)
## 
## Call:
## lm(formula = Dilution ~ Hr_24, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03664 -0.03607 -0.02750  0.06237  0.06787 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.037644   0.001497  25.146   <2e-16 ***
## Hr_24       -0.003784   0.004173  -0.907    0.365    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04472 on 1150 degrees of freedom
## Multiple R-squared:  0.0007146,  Adjusted R-squared:  -0.0001544 
## F-statistic: 0.8223 on 1 and 1150 DF,  p-value: 0.3647
b<- lm(formula = Dilution ~ Hr_48, data = data)
summary(b)
## 
## Call:
## lm(formula = Dilution ~ Hr_48, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03713 -0.03571 -0.02745  0.06198  0.06650 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.038127   0.001710  22.296   <2e-16 ***
## Hr_48       -0.002403   0.002324  -1.034    0.301    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04472 on 1150 degrees of freedom
## Multiple R-squared:  0.0009286,  Adjusted R-squared:  5.981e-05 
## F-statistic: 1.069 on 1 and 1150 DF,  p-value: 0.3014
c<- lm(formula= Dilution ~ Hr_144, data = data)
summary(c)
## 
## Call:
## lm(formula = Dilution ~ Hr_144, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04068 -0.03168 -0.02651  0.05956  0.07303 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.041682   0.001923   21.68  < 2e-16 ***
## Hr_144      -0.005062   0.001520   -3.33 0.000896 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04452 on 1150 degrees of freedom
## Multiple R-squared:  0.00955,    Adjusted R-squared:  0.008689 
## F-statistic: 11.09 on 1 and 1150 DF,  p-value: 0.0008963

Hr 144 is the most significant to the Dilution factor.

hist(data$Dilution)

hist(data$Hr_144)

hist(data$Hr_48)

hist(data$Hr_24)

names(data)
## [1] "Sample.ID" "Rep"       "Well"      "Dilution"  "Substrate" "Hr_24"    
## [7] "Hr_48"     "Hr_144"
ggplot(data,aes(x=data$Dilution,y=data$Substrate)) +
  geom_boxplot() + facet_wrap(~Sample.ID)

fig1<-ggplot(data,aes(x=data$Hr_24,fill= Sample.ID)) +
  geom_histogram()



fig2<-ggplot(data,aes(x=data$Hr_24,fill= Substrate)) +
  geom_histogram()
ggplotly(fig1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(fig2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fig3<-ggplot(data,aes(x=data$Hr_48,fill= Sample.ID)) +
  geom_histogram()



fig4<-ggplot(data,aes(x=data$Hr_48,fill= Substrate)) +
  geom_histogram()




fig5<-ggplot(data,aes(x=data$Hr_144,fill= Sample.ID)) +
  geom_histogram()



fig6<-ggplot(data,aes(x=data$Hr_144,fill= Substrate)) +
  geom_histogram()

ggplotly(fig3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(fig4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(fig5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(fig6)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.